DNA methylation-based diagnosis and prognosis of pediatric AML#

We propose to leverage machine learning tools to develop DNA methylation-based signatures of clinical utility in pediatric AML.

The AML Methylome#

Interactive visualization of the diagnostic map of AML for pediatric/adolecent/young adult patients based solely on DNA methylation.

Hide code cell outputs
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18616\2526871669.py in <module>
      5 output_path = '../Data/Processed_Data/'
      6 
----> 7 x_train = pd.read_pickle(PaCMAP_path+'embedding.pkl')
      8 x_test = pd.read_pickle(PaCMAP_path+'embedding_test.pkl')
      9 

c:\users\flourenco\appdata\local\programs\python\python37\lib\site-packages\pandas\io\pickle.py in read_pickle(filepath_or_buffer, compression, storage_options)
    215                     # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]";
    216                     # expected "IO[bytes]"
--> 217                     return pickle.load(handles.handle)  # type: ignore[arg-type]
    218             except excs_to_catch:
    219                 # e.g.

ValueError: unsupported pickle protocol: 5
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource, Legend
from bokeh.plotting import figure
from bokeh.transform import factor_cmap

# Define a function for creating the Bokeh figure
def create_figure():
    return figure(title='The Pediatric AML Methylome Atlas',
                  width=1000, height=600, sizing_mode='fixed',
                  x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
                  tools="pan,wheel_zoom,reset,save", active_drag="pan",
                  active_scroll="wheel_zoom",
                  tooltips=[("Diagnosis", "@{WHO Classification}"),
                            ("Karyotype", "@Karyotype")])
# Define a function for creating the scatter plots
def create_scatters(df, p, hue):
    df = df[~df[hue].isna()]  # Filter out rows with NaN values for the hue column
    filtered_dfs = [df[df[hue] == val] for val in df[hue].value_counts().sort_values(ascending=False).index.to_list()]
    
    renderers = []
    items = []
    for i in range(len(filtered_dfs)):
        name = filtered_dfs[i][hue].head(1).values[0]
        color = custom_color_palette[i]
        source = ColumnDataSource(filtered_dfs[i])
        r = p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source,
                     fill_alpha=0.8, size=5,
                     color=color)
        renderers.append(r)
        items.append((name, [r]))

    return renderers, items

# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
            tabs_location='left')

points = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols[:-1])]
for p, (renderers, items) in zip(tabs.tabs, points):
    p.child.toolbar.logo = None
    p.child.toolbar_location = 'above'
    legend = Legend(items=items, location='top_left',)
    p.child.add_layout(legend, 'right')
    p.child.legend.click_policy = 'hide'

# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0][0][0].glyph.size)
for i in range(len(points)): 
    for r in points[i][0]: 
        slider.js_link("value", r.glyph, "size")


layout = layout([[[tabs, slider]]])

show(layout)
from bokeh.layouts import layout, gridplot
from bokeh.models import ColumnDataSource

# Define a function for creating the Bokeh figure
def create_figure():
    return figure(width=600, height=600, sizing_mode='fixed',
                  x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
                  tools="pan,wheel_zoom,reset,save", active_drag="pan",
                  active_scroll="wheel_zoom",
                  tooltips=[("Diagnosis", "@{WHO Classification}"),
                            ("Karyotype", "@Karyotype")])

def create_scatters(df, p, hue):
    df = df[~df[hue].isna()]  # Filter out rows with NaN values for the hue column
    filtered_dfs = [df[df[hue] == val] for val in df[hue].value_counts().sort_values(ascending=False).index.to_list()]
    
    renderers = []
    for i in range(len(filtered_dfs)):
        name = filtered_dfs[i][hue].head(1).values[0]
        color = custom_color_palette[i]
        source = ColumnDataSource(filtered_dfs[i])
        r = p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source,
                     fill_alpha=0.8, size=5,
                     color=color, legend_label=name)
        renderers.append(r)

    return renderers

# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
            tabs_location='left')

points = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
    p.child.toolbar.logo = None
    p.child.legend.click_policy = 'hide'

tabs2 = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
            tabs_location=)

points2 = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs2.tabs, cols)]
for p in tabs.tabs:
    p.child.toolbar.logo = None
    p.child.legend.click_policy = 'hide'

# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0][0].glyph.size)
for i in range(len(points)): 
    for r in points[i]: 
        slider.js_link("value", r.glyph, "size")

grid = gridplot([[tabs, tabs2]])
layout = layout([[[grid, slider]]])

show(layout)
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource

# Define a function for creating the Bokeh figure
def create_figure():
    return figure(width=600, height=600, sizing_mode='fixed',
                  x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
                  tools="pan,wheel_zoom,reset,save", active_drag="pan",
                  active_scroll="wheel_zoom",
                  tooltips=[("Diagnosis", "@{WHO Classification}"),
                            ("Karyotype", "@Karyotype")])

def create_scatters(df, p, hue):
    df = df[~df[hue].isna()]  # Filter out rows with NaN values for the hue column
    filtered_dfs = [df[df[hue] == val] for val in df[hue].value_counts().sort_values(ascending=False).index.to_list()]
    
    renderers = []
    for i in range(len(filtered_dfs)):
        name = filtered_dfs[i][hue].head(1).values[0]
        color = custom_color_palette[i]
        source = ColumnDataSource(filtered_dfs[i])
        r = p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source,
                     fill_alpha=0.8, size=5,
                     color=color, legend_label=name)
        renderers.append(r)

    return renderers

# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
            tabs_location='left')

points = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
    p.child.toolbar.logo = None
    p.child.legend.click_policy = 'hide'

# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0][0].glyph.size)
for i in range(len(points)): 
    for r in points[i]: 
        slider.js_link("value", r.glyph, "size")


layout = layout([[[tabs, slider]]])

show(layout)
points[0][0].glyph
Scatter(
id = 'p18018', …)
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource

# Define a function for creating the Bokeh figure
def create_figure():
    return figure(width=600, height=600, sizing_mode='fixed',
                  x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
                  tools="pan,wheel_zoom,reset,save", active_drag="pan",
                  active_scroll="wheel_zoom",
                  tooltips=[("Diagnosis", "@{WHO Classification}"),
                            ("Karyotype", "@Karyotype")])

def create_scatters(df, p, hue):
    df = df[~df[hue].isna()]  # Filter out rows with NaN values for the hue column
    filtered_dfs = [df[df[hue] == val] for val in df[hue].value_counts().sort_values(ascending=False).index.to_list()]
    
    for i in range(len(filtered_dfs)):
        name = filtered_dfs[i][hue].head(1).values[0]
        color = custom_color_palette[i]
        source = ColumnDataSource(filtered_dfs[i])
        p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source,
                     fill_alpha=0.8, size=5,
                     color=color, legend_label=name)
    return p

# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
            tabs_location='left')

points = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
    p.child.toolbar.logo = None
    p.child.legend.click_policy = 'hide'

# Define a slider for adjusting the size of the data points
# slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
# for p in points:
#     slider.js_link("value", p.glyph, "size")


layout = layout([[[tabs]]])

show(layout)
points[0].glyph
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[12], line 1
----> 1 points[0].glyph

File c:\Users\flourenco\Desktop\projects\Methylation_Project\Code\.venv_data_vizualization\lib\site-packages\bokeh\core\has_props.py:360, in HasProps.__getattr__(self, name)
    357 if isinstance(descriptor, property): # Python property
    358     return super().__getattribute__(name)
--> 360 self._raise_attribute_error_with_matches(name, properties)

File c:\Users\flourenco\Desktop\projects\Methylation_Project\Code\.venv_data_vizualization\lib\site-packages\bokeh\core\has_props.py:368, in HasProps._raise_attribute_error_with_matches(self, name, properties)
    365 if not matches:
    366     matches, text = sorted(properties), "possible"
--> 368 raise AttributeError(f"unexpected attribute {name!r} to {self.__class__.__name__}, {text} attributes are {nice_join(matches)}")

AttributeError: unexpected attribute 'glyph' to figure, possible attributes are above, align, aspect_ratio, aspect_scale, background_fill_alpha, background_fill_color, below, border_fill_alpha, border_fill_color, center, classes, context_menu, css_classes, disabled, extra_x_ranges, extra_x_scales, extra_y_ranges, extra_y_scales, flow_mode, frame_align, frame_height, frame_width, height, height_policy, hidpi, hold_render, inner_height, inner_width, js_event_callbacks, js_property_callbacks, left, lod_factor, lod_interval, lod_threshold, lod_timeout, margin, match_aspect, max_height, max_width, min_border, min_border_bottom, min_border_left, min_border_right, min_border_top, min_height, min_width, name, outer_height, outer_width, outline_line_alpha, outline_line_cap, outline_line_color, outline_line_dash, outline_line_dash_offset, outline_line_join, outline_line_width, output_backend, renderers, reset_policy, resizable, right, sizing_mode, styles, stylesheets, subscribed_events, syncable, tags, title, title_location, toolbar, toolbar_inner, toolbar_location, toolbar_sticky, visible, width, width_policy, x_range, x_scale, y_range or y_scale
from bokeh.layouts import layout


# Define a function for creating the Bokeh figure
def create_figure():
    return figure(width=600, height=600, sizing_mode='fixed',
                  x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
                  tools="pan,wheel_zoom,reset,save", active_drag="pan",
                  active_scroll="wheel_zoom",
                  tooltips=[("Diagnosis", "@{WHO Classification}"),
                            ("Karyotype", "@Karyotype")])

# Define a function for creating a scatter plot with color encoding by a given column
def create_scatter(df, p, hue):
    df = df[~df[hue].isna()]  # Filter out rows with NaN values for the hue column
    return p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=df.copy(),
                     fill_alpha=0.8, size=5,
                     color=factor_cmap(field_name=hue, palette=custom_color_palette,
                                       factors=df[hue].value_counts().sort_values(
                                           ascending=False).index.to_list()),
                     legend_group=hue)

# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
            tabs_location='left')

points = [create_scatter(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
    p.child.toolbar.logo = None

# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
for p in points:
    slider.js_link("value", p.glyph, "size")


layout = layout([[[tabs, slider]]])

show(layout)
import pandas as pd
from bokeh.palettes import Spectral4
from bokeh.plotting import figure, output_file, show
from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT

p = figure(width=800, height=250, x_axis_type="datetime")
p.title.text = 'Click on legend entries to hide the corresponding lines'

for data, name, color in zip([AAPL, IBM, MSFT, GOOG], ["AAPL", "IBM", "MSFT", "GOOG"], Spectral4):
    df = pd.DataFrame(data)
    df['date'] = pd.to_datetime(df['date'])
    p.line(df['date'], df['close'], line_width=2, color=color, alpha=0.8, legend_label=name)

p.legend.location = "top_left"
p.legend.click_policy="hide"

show(p)
from bokeh.core.enums import LegendLocation
from bokeh.io import show
from bokeh.layouts import gridplot
from bokeh.models import (Circle, ColumnDataSource, DataRange1d, Legend,
                          LinearAxis, PanTool, Plot, SaveTool, WheelZoomTool,
                          CategoricalColorMapper)

source = ColumnDataSource(data=df)

# Define color mapper based on a column in the data source
color_mapper = CategoricalColorMapper(factors=df['WHO Classification'].value_counts().sort_values(
                                           ascending=False).index.to_list(),
                                            palette=custom_color_palette) # list of unique categories, list of colors to assign to categories

xdr = DataRange1d()
ydr = DataRange1d()

plot = Plot(
    x_range=xdr, y_range=ydr,
    width=600, height=600,
    min_border=0,
    toolbar_location='right',
)

# Use color_mapper to set fill color for circles
circle = Circle(x="PaCMAP 1", y="PaCMAP 2", size=6,
                line_color="black", fill_alpha=0.6,
                fill_color={'field': 'category_column', 'transform': color_mapper})
circle = plot.add_glyph(source, circle)

pan = PanTool()
wheel_zoom = WheelZoomTool()
preview_save = SaveTool()

plot.add_tools(pan, wheel_zoom, preview_save)

# Add axes
plot.add_layout(LinearAxis(), 'below')
plot.add_layout(LinearAxis(), 'left')
plot.add_layout(LinearAxis(), 'right')

# Create a separate legend plot using color_mapper
legend = Legend(
    items=[("PaCMAP Output", [circle])],
    location="center", orientation="vertical",
    border_line_color="black",
    title='Example Title'
)

legend_plot = Plot(
    width=200, height=600,
    toolbar_location=None,
)
legend_plot.add_layout(legend, 'center')

# Combine the two plots into a gridplot
grid = gridplot([[plot, legend_plot]])

show(grid)
from bokeh.core.enums import LegendLocation
from bokeh.io import show
from bokeh.layouts import gridplot
from bokeh.models import (Circle, ColumnDataSource, DataRange1d, Legend,
                          LinearAxis, PanTool, Plot, SaveTool, WheelZoomTool)

source = ColumnDataSource(data=df)

xdr = DataRange1d()
ydr = DataRange1d()

plot = Plot(
    x_range=xdr, y_range=ydr,
    width=600, height=600,
    min_border=0,
    toolbar_location='right',
)

circle = Circle(x="PaCMAP 1", y="PaCMAP 2", size=6, line_color="red", fill_color="orange", fill_alpha=0.6)
circle = plot.add_glyph(source, circle)

pan = PanTool() 
wheel_zoom = WheelZoomTool()
preview_save = SaveTool()

plot.add_tools(pan, wheel_zoom, preview_save)

# Add axes
plot.add_layout(LinearAxis(), 'below')
plot.add_layout(LinearAxis(), 'left')
plot.add_layout(LinearAxis(), 'right') 

# Create a separate legend plot
legend = Legend(
    items=[("PaCMAP Output", [circle])],
    location="center", orientation="vertical",
    border_line_color="black",
    title='Example Title'
)
legend_plot = Plot(
    width=600, height=600,
    toolbar_location=None,
)
legend_plot.add_layout(legend, 'left')

# Combine the two plots into a gridplot
grid = gridplot([[plot, legend_plot]])

show(grid)
WARNING:bokeh.core.validation.check:W-1000 (MISSING_RENDERERS): Plot has no renderers: Plot(id='p3643', ...)
source()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[33], line 1
----> 1 source()

TypeError: 'ColumnDataSource' object is not callable
from numpy import cos, linspace, pi, sin

from bokeh.core.enums import LegendLocation
from bokeh.io import show
from bokeh.layouts import gridplot
from bokeh.models import (Circle, ColumnDataSource, DataRange1d, Legend,
                          LinearAxis, PanTool, Plot, SaveTool, WheelZoomTool)



source = ColumnDataSource(data=df.copy())

xdr = DataRange1d()
ydr = DataRange1d()

plot = Plot(
    x_range=xdr, y_range=ydr,
    width=600, height=600,
    min_border=0,
    toolbar_location='right',
)

circle = Circle(x="PaCMAP1", y="PaCMAP2", size=6, line_color="red", fill_color="orange", fill_alpha=0.6)
circle = plot.add_glyph(source, circle)

pan = PanTool()
wheel_zoom = WheelZoomTool()
preview_save = SaveTool()

plot.add_tools(pan, wheel_zoom, preview_save)

# Add axes
plot.add_layout(LinearAxis(), 'below')
plot.add_layout(LinearAxis(), 'left')
plot.add_layout(LinearAxis(), 'right')

# Create a separate legend plot
legend = Legend(
    items=[("circleeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee", [circle])],
    location="center", orientation="vertical",
    border_line_color="black",
    title='Example Title'
)
legend_plot = Plot(
    width=600, height=600,
    toolbar_location=None,
)
legend_plot.add_layout(legend, 'left')

# Combine the two plots into a gridplot
grid = gridplot([[plot, legend_plot]])

show(grid)
WARNING:bokeh.core.validation.check:W-1000 (MISSING_RENDERERS): Plot has no renderers: Plot(id='p15743', ...)
from numpy import cos, linspace, pi, sin

from bokeh.core.enums import LegendLocation
from bokeh.io import show
from bokeh.models import (Circle, ColumnDataSource, DataRange1d, Legend,
                          LinearAxis, PanTool, Plot, SaveTool, WheelZoomTool)

x = linspace(-2*pi, 2*pi, 400)
y = sin(x)

source = ColumnDataSource(data=dict(x=x, y=y))

xdr = DataRange1d()
ydr = DataRange1d()

plot = Plot(
    x_range=xdr, y_range=ydr,
    width=600, height=600,
    min_border=0,
    toolbar_location='right',
)

circle = Circle(x="x", y="y", size=6, line_color="red", fill_color="orange", fill_alpha=0.6)
circle = plot.add_glyph(source, circle)

pan = PanTool()
wheel_zoom = WheelZoomTool()
preview_save = SaveTool()

plot.add_tools(pan, wheel_zoom, preview_save)

# Add axes (Note it's important to add these before adding legends in side panels)
plot.add_layout(LinearAxis(), 'below')
plot.add_layout(LinearAxis(), 'left')
plot.add_layout(LinearAxis(), 'right')

def add_legend(location, orientation, side):
    legend = Legend(
        items=[("circle", [circle])],
        location=location, orientation=orientation,
        border_line_color="black",
        title='Example Title'
    )
    plot.add_layout(legend, side)

add_legend("center", "vertical", "below")

show(plot)
from bokeh.layouts import layout
# Define the list of columns to include in the plot
cols = ['PaCMAP Output','WHO Classification', 'FAB', 'FLT3 ITD', 'Age group (years)',
        'Complex Karyotype','Primary Cytogenetic Code' ,'Karyotype']

# Join the training data with the labels and reset the index
df = x_train.join(y_train[cols]).reset_index()

# Set the theme for the plot
curdoc().theme = 'light_minimal'

# Define a function for creating the Bokeh figure
def create_figure():
    return figure(width=600, height=600, sizing_mode='fixed',
                  x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
                  tools="pan,wheel_zoom,reset,save", active_drag="pan",
                  active_scroll="wheel_zoom",
                  tooltips=[("Diagnosis", "@{WHO Classification}"),
                            ("Karyotype", "@Karyotype")])

# Define a function for creating a scatter plot with color encoding by a given column
def create_scatter(df, p, hue):
    df = df[~df[hue].isna()]  # Filter out rows with NaN values for the hue column
    return p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=df.copy(),
                     fill_alpha=0.8, size=5,
                     color=factor_cmap(field_name=hue, palette=custom_color_palette,
                                       factors=df[hue].value_counts().sort_values(
                                           ascending=False).index.to_list()),
                     legend_group=hue)

# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
            tabs_location='left')

points = [create_scatter(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
    p.child.toolbar.logo = None

# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
for p in points:
    slider.js_link("value", p.glyph, "size")

# Create a layout for the plot and display it
div = Div(text="<b>The AML Diagnostic Map</b>\nInteractive visualization of the pediatric AML methylome:",
          width=200, height=85)
layout = layout([[[div, tabs, slider]]])

show(layout)
from bokeh.layouts import layout
# Define the list of columns to include in the plot
cols = ['PaCMAP Output','WHO Classification', 'FAB', 'FLT3 ITD', 'Age group (years)',
        'Complex Karyotype','Primary Cytogenetic Code' ,'Karyotype']

# Join the training data with the labels and reset the index
df = x_train.join(y_train[cols]).reset_index()

# Set the theme for the plot
curdoc().theme = 'light_minimal'

# Define a function for creating the Bokeh figure
def create_figure():
    return figure(width=600, height=600, sizing_mode='fixed',
                  x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
                  tools="pan,wheel_zoom,reset,save", active_drag="pan",
                  active_scroll="wheel_zoom",
                  tooltips=[("Diagnosis", "@{WHO Classification}"),
                            ("Karyotype", "@Karyotype")])

# Define a function for creating a scatter plot with color encoding by a given column
def create_scatter(df, p, hue):
    df = df[~df[hue].isna()]  # Filter out rows with NaN values for the hue column
    return p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=df.copy(),
                     fill_alpha=0.8, size=5,
                     color=factor_cmap(field_name=hue, palette=custom_color_palette,
                                       factors=df[hue].value_counts().sort_values(
                                           ascending=False).index.to_list()),
                     legend_group=hue)

# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
            tabs_location='left')

points = [create_scatter(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
    p.child.toolbar.logo = None

# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
for p in points:
    slider.js_link("value", p.glyph, "size")

# Create a layout for the plot and display it
div = Div(text="<b>The AML Diagnostic Map</b>\nInteractive visualization of the pediatric AML methylome:",
          width=200, height=85)
layout = layout([[[div, tabs, slider]]])

show(layout)
# Define the list of columns to include in the plot
cols = ['Primary Cytogenetic Code', 'FAB', 'FLT3 ITD', 'Age group (years)',
        'WHO Classification', 'Complex Karyotype', 'Karyotype']

# Join the training data with the labels and reset the index
df = x_train.join(y_train[cols]).reset_index()

# Set the theme for the plot
curdoc().theme = 'light_minimal'

# Define a function for creating the Bokeh figure
def create_figure():
    return figure(width=600, height=600, sizing_mode='fixed',
                  x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
                  tools="pan,wheel_zoom,reset,save", active_drag="pan",
                  active_scroll="wheel_zoom",
                  tooltips=[("Sample", "@index"), ("Karyotype", "@Karyotype")])

# Define a function for creating a scatter plot with color encoding by a given column
def create_scatter(df, p, hue):
    df = df[~df[hue].isna()]  # Filter out rows with NaN values for the hue column
    return p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=df.copy(),
                     fill_alpha=0.8, size=5,
                     color=factor_cmap(field_name=hue, palette=custom_color_palette,
                                       factors=df[hue].value_counts().sort_values(
                                           ascending=False).index.to_list()),
                     legend_group=hue)

# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title='PaCMAP Output'),
                  TabPanel(child=create_figure(), title='FAB'),
                  TabPanel(child=create_figure(), title='Complex Karyotype'),
                  TabPanel(child=create_figure(), title='FLT3 ITD'),
                  TabPanel(child=create_figure(), title='Primary Cytogenetic Code'),
                  TabPanel(child=create_figure(), title='WHO Classification'),
                  TabPanel(child=create_figure(), title='Age group (years)')],
            tabs_location='left')

points = [create_scatter(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
    p.child.toolbar.logo = None

# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
for p in points:
    slider.js_link("value", p.glyph, "size")

# Create a layout for the plot and display it
div = Div(text="<b>The AML Diagnostic Map</b>\nInteractive visualization of the pediatric AML methylome:",
          width=200, height=85)
layout = layout([[[div, tabs, slider]]])

show(layout)
from bokeh.layouts import layout

list = ['Primary Cytogenetic Code', 'FAB', 'FLT3 ITD','Age group (years)',
       'WHO Classification','Complex Karyotype', 'Karyotype']

df = x_train.join(y_train[list]).reset_index() # join embedding with labels
df['PaCMAP Output'] = 'PaCMAP Output'

curdoc().theme = 'light_minimal'

def fig():
    """
    Figure specs for Bokeh plot
    """
    
    fig = figure(
           width=600,
           height=600,
           sizing_mode='fixed',
           x_axis_label='PaCMAP 1',
           y_axis_label='PaCMAP 2',
           tools="pan,wheel_zoom, reset, save",
           active_drag="pan",
           active_scroll="wheel_zoom",
           tooltips=[("Sample", "@index"),
                     ("Karyotype", "@Karyotype"),])

    return(fig)

def scatter(df, p, hue):
    """
    Scatter plot of embedding with color by hue
    
    Parameters
    ----------
    p : bokeh.plotting.figure.Figure
        Bokeh figure object
    hue : str
        Column name of df to color by
    Returns
    -------
    points : bokeh.models.renderers.GlyphRenderer
        Bokeh glyph renderer object
        
    """
    df = df[~df[hue].isna()] # df where df hue is not nan
    points = p.scatter(x="PaCMAP 1",
                   y= "PaCMAP 2",
                   source=df.copy(),
                   fill_alpha=0.8,
                   size=5,
                   color=factor_cmap(field_name= hue,
                                     palette=custom_color_palette,
                                     factors= df[hue].value_counts(
                                     ).sort_values(ascending=False
                                     ).index.to_list()),
                   legend_group=hue)
    return(points)

def create_tabs(df):
    def create_tab(title, hue):
        p = fig()
        points = scatter(df, p, hue=hue)
        tab = TabPanel(child=p, title=title)
        p.toolbar.logo = None

        return tab

    tabs = Tabs(
        tabs=[create_tab(title, hue) for title, hue in list],
        tabs_location='left'
    )

    return tabs


slider = Slider(
    title="Adjust datapoint size",
    start=0,
    end=20,
    step=1,
    value=(points1.glyph.size))

slider.js_link("value", points1.glyph, "size")
slider.js_link("value", points2.glyph, "size")
slider.js_link("value", points3.glyph, "size")
slider.js_link("value", points4.glyph, "size")
slider.js_link("value", points5.glyph, "size")
slider.js_link("value", points6.glyph, "size")
slider.js_link("value", points7.glyph, "size")

# create layout
layout = layout([[[tabs, slider]]])

# show result
show(layout)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[3], line 88
     72     return tabs
     75 div = Div(
     76     text="""
     77           <b> The AML Diagnostic Map</b>
   (...)
     80     width=200,
     81     height=85)
     83 slider = Slider(
     84     title="Adjust datapoint size",
     85     start=0,
     86     end=20,
     87     step=1,
---> 88     value=(points1.glyph.size))
     90 slider.js_link("value", points1.glyph, "size")
     91 slider.js_link("value", points2.glyph, "size")

NameError: name 'points1' is not defined

Table of Contents#

Specific Aims